In [1]:
import pandas as pd
# Font: https://www.kaggle.com/datasets/uom190346a/sleep-health-and-lifestyle-dataset
file = "data/List of languages by total number of speakers.csv"
df = pd.read_csv(file)
df["First-language(L1) speakers"] = df["First-language(L1) speakers"].str.replace(" million", "")
df["First-language(L1) speakers"] = pd.to_numeric(df["First-language(L1) speakers"], errors='coerce')
df.head()
Out[1]:
| Unnamed: 0 | Language | Family | Branch | First-language(L1) speakers | Second-language(L2) speakers | Total speakers(L1+L2) | |
|---|---|---|---|---|---|---|---|
| 0 | 0 | English(excl. creole languages) | Indo-European | Germanic | 372.9 | 1.080 billion[5] | 1.452 billion |
| 1 | 1 | Mandarin Chinese(incl. Standard Chinese, but e... | Sino-Tibetan | Sinitic | 929.0 | 198.7 million[6] | 1.118 billion |
| 2 | 2 | Hindi(excl. Urdu) | Indo-European | Indo-Aryan | 343.9 | 258.3 million[7] | 602.2 million |
| 3 | 3 | Spanish | Indo-European | Romance | 474.7 | 73.6 million[8] | 548.3 million |
| 4 | 4 | French | Indo-European | Romance | 79.9 | 194.2 million[9] | 274.1 million |
In [2]:
import plotly.express as px
# Creem un Treemap
fig = px.treemap(df, path=["Branch", "Family", "Language"], values="First-language(L1) speakers",
color="First-language(L1) speakers", color_continuous_scale=px.colors.sequential.Mint,
title="Idiomes més parlats al mon (x milions de parlants com a llengua materna)")
fig.show()